# load packages
library(tidyverse)
library(RColorBrewer)
library(lubridate)
library(gganimate)
library(ggimage)
# read in datasets for Task 1 and 2
launches <- read_csv("launches.csv")
usda <- read_csv("usda_nutrients.csv")
# Make launch data frame with country -> change from state_code, launch year, and number of launches per year for the graph with top 4 countries and the rest as "other"
launch_1 <- launches %>%
mutate(country = fct_collapse(state_code,
"Russia/USSR" = c("RU", "SU"),
"United States" = "US",
"China" = "CN",
"France" = "F"),
country = fct_lump(country, 4)) %>%
group_by(country, launch_year) %>%
count()
# Want to find the percent of success per country
# make a dataframe with counts of number of successes per country
launch_2 <- launches %>%
select(state_code, category) %>%
mutate(country = fct_collapse(state_code,
"Russia/USSR" = c("RU", "SU"),
"United States" = "US",
"China" = "CN",
"France" = "F"),
country = fct_lump(country, 4)) %>%
group_by(country, category) %>%
count()
# number of successes per country from the counts in launch_2: C-289, F-284, USSR-3024, US-1585, Other-202
# use that dataframe to calculate the percent of successful launches by country
#first make a dataframe that just has total number of launches by country
launch_success <- launches %>%
mutate(country = fct_collapse(state_code,
"Russia/USSR" = c("RU", "SU"),
"United States" = "US",
"China" = "CN",
"France" = "F"),
country = fct_lump(country, 4)) %>%
group_by(country) %>%
count()
# add a column for number of successes using values found from counts in launch_2 dataframe
launch_success$number_success <- c(289, 284, 3024, 1585, 202)
# final dataframe that finds the percentage of success
success <- launch_success %>%
mutate(percent = number_success/n) %>%
select(country, percent)
# join data frames for final graph
launch_data <- left_join(launch_1,success, by="country")
# Read in rocket image for gganimate
rocket <- "rocket.png"
# Graph for year vs. counts by country, color = percent of successes per country
launch_graph <- ggplot(launch_data, aes(x=launch_year, y = n, group=country, color=percent)) +
geom_line() +
geom_image(aes(image = rocket)) +
scale_y_continuous(expand = c(0,0), limits = c(0,120), breaks = seq(0,120, by = 15)) +
scale_x_continuous(expand = c(0,0), limits = c(1955, 2020), breaks = seq(1955,2020, by=5)) +
labs(x="Launch Year", y = "Number of Launches") +
scale_color_distiller(palette = "YlGnBu", direction = 1,
name = "Percent of Successful\nLaunches by Country") +
theme(panel.background = element_rect(fill = "grey86"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color="black", fill = NA)) +
geom_text(aes(label= country, color = percent), position = position_nudge(y = 0.04, x = 9),
size = 5) +
transition_reveal(country, launch_year)
launch_graph
Figure 1: Top Five Countries in Space Launches Since 1955. The top five countries with the largest numbers of space launches are shown, all other countries are aggregrated into category “Other”. Color indicates the proportion of successful launches out of the total launches per country in the time period from 1955 to 2018.
# load packages for Task 2 to avoid conflicts in task 1
library(factoextra)
library(ggbiplot)
# Data wrangling: include food groups "vegetable and vegetable products" and "fruits and fruit juices", retain observations where ShortDescrip contains "RAW"
nutrients <- usda %>%
filter(food_group == "Vegetables and Vegetable Products" |
food_group == "Fruits and Fruit Juices") %>%
filter(str_detect(short_descrip, pattern="RAW"))
# PCA for variables from Protein through Zinc
nutrient_pca <- prcomp(nutrients[9:30], scale = TRUE)
fviz_pca_biplot(nutrient_pca,
habillage = nutrients$food_group,
palette = c("darkorange2", "darkgreen"),
label = "var",
repel = TRUE,
col.var="midnightblue") +
labs(title = "PCA Biplot: Fruit and Vegetable Nutrients",
x = "Dimension 1 (28.1%)",
y = "Dimension 2 (10.5%)") +
theme_bw()
Trends
From the biplot above the following trends can be observed:
(1) There is a negative correlation between Sugar and Protein
(2) There is a positive correlation between Vitamin E and Fiber
(3) There is no correlation between sugar and Vitamin B12